@@ -103,6 +103,8 @@ module Agents |
||
103 | 103 |
|
104 | 104 |
Set `unzip` to `gzip` to inflate the resource using gzip. |
105 | 105 |
|
106 |
+ Set `consider_http_error_success` to an array of ints, ex: `[404]` to consider also 404 as successes, and to scrape it. |
|
107 |
+ |
|
106 | 108 |
# Liquid Templating |
107 | 109 |
|
108 | 110 |
In Liquid templating, the following variable is available: |
@@ -149,6 +151,7 @@ module Agents |
||
149 | 151 |
errors.add(:base, "either url, url_from_event, or data_from_event are required") unless options['url'].present? || options['url_from_event'].present? || options['data_from_event'].present? |
150 | 152 |
errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present? |
151 | 153 |
validate_extract_options! |
154 |
+ validate_consider_http_success_option! |
|
152 | 155 |
|
153 | 156 |
# Check for optional fields |
154 | 157 |
if options['mode'].present? |
@@ -166,6 +169,27 @@ module Agents |
||
166 | 169 |
validate_web_request_options! |
167 | 170 |
end |
168 | 171 |
|
172 |
+ def validate_consider_http_success_option! |
|
173 |
+ consider_success = options["consider_http_error_success"] |
|
174 |
+ if consider_success != nil |
|
175 |
+ |
|
176 |
+ if consider_success.class != Array |
|
177 |
+ errors.add(:base,"Must be an array and specify at least one status code") |
|
178 |
+ else |
|
179 |
+ if consider_success.uniq.count != consider_success.count |
|
180 |
+ errors.add(:base,"Duplicate http code found") |
|
181 |
+ else |
|
182 |
+ if consider_success.map(&:class).uniq != [Fixnum] |
|
183 |
+ errors.add(:base,"Please make sure to use only integer values for code") |
|
184 |
+ else |
|
185 |
+ @error_codes_considered_success = consider_success |
|
186 |
+ end |
|
187 |
+ end |
|
188 |
+ end |
|
189 |
+ |
|
190 |
+ end |
|
191 |
+ end |
|
192 |
+ |
|
169 | 193 |
def validate_extract_options! |
170 | 194 |
extraction_type = (extraction_type() rescue extraction_type(options)) |
171 | 195 |
case extract = options['extract'] |
@@ -273,7 +297,7 @@ module Agents |
||
273 | 297 |
uri = Utils.normalize_uri(url) |
274 | 298 |
log "Fetching #{uri}" |
275 | 299 |
response = faraday.get(uri) |
276 |
- raise "Failed: #{response.inspect}" unless response.success? |
|
300 |
+ raise "Failed: #{response.inspect}" unless consider_response_successful?(response) |
|
277 | 301 |
|
278 | 302 |
interpolation_context.stack { |
279 | 303 |
interpolation_context['_response_'] = ResponseDrop.new(response) |
@@ -353,6 +377,11 @@ module Agents |
||
353 | 377 |
end |
354 | 378 |
|
355 | 379 |
private |
380 |
+ def consider_response_successful?(response) |
|
381 |
+ response.success? || begin |
|
382 |
+ @error_codes_considered_success.present? && @error_codes_considered_success.include?(response.status) |
|
383 |
+ end |
|
384 |
+ end |
|
356 | 385 |
|
357 | 386 |
def handle_event_data(data, event, existing_payload) |
358 | 387 |
handle_data(data, event.payload['url'], existing_payload) |
@@ -40,6 +40,23 @@ describe Agents::WebsiteAgent do |
||
40 | 40 |
expect(@checker).not_to be_valid |
41 | 41 |
end |
42 | 42 |
|
43 |
+ it 'should validate the consider_http_error_success fields' do |
|
44 |
+ @checker.options['consider_http_error_success'] = [404] |
|
45 |
+ expect(@checker).to be_valid |
|
46 |
+ |
|
47 |
+ @checker.options['consider_http_error_success'] = [404, 404] |
|
48 |
+ expect(@checker).not_to be_valid |
|
49 |
+ |
|
50 |
+ @checker.options['consider_http_error_success'] = [404.0] |
|
51 |
+ expect(@checker).not_to be_valid |
|
52 |
+ |
|
53 |
+ @checker.options['consider_http_error_success'] = ["not_a_code"] |
|
54 |
+ expect(@checker).not_to be_valid |
|
55 |
+ |
|
56 |
+ @checker.options['consider_http_error_success'] = [] |
|
57 |
+ expect(@checker).not_to be_valid |
|
58 |
+ end |
|
59 |
+ |
|
43 | 60 |
it "should validate uniqueness_look_back" do |
44 | 61 |
@checker.options['uniqueness_look_back'] = "nonsense" |
45 | 62 |
expect(@checker).not_to be_valid |
@@ -169,6 +186,38 @@ describe Agents::WebsiteAgent do |
||
169 | 186 |
end |
170 | 187 |
end |
171 | 188 |
|
189 |
+ describe 'consider_http_error_success' do |
|
190 |
+ it 'should allow scraping from a 404 result' do |
|
191 |
+ json = { |
|
192 |
+ 'response' => { |
|
193 |
+ 'version' => 2, |
|
194 |
+ 'title' => "hello!" |
|
195 |
+ } |
|
196 |
+ } |
|
197 |
+ zipped = ActiveSupport::Gzip.compress(json.to_json) |
|
198 |
+ stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 404) |
|
199 |
+ site = { |
|
200 |
+ 'name' => "Some JSON Response", |
|
201 |
+ 'expected_update_period_in_days' => "2", |
|
202 |
+ 'type' => "json", |
|
203 |
+ 'url' => "http://gzip.com", |
|
204 |
+ 'mode' => 'on_change', |
|
205 |
+ 'consider_http_error_success': [404], |
|
206 |
+ 'extract' => { |
|
207 |
+ 'version' => { 'path' => 'response.version' }, |
|
208 |
+ }, |
|
209 |
+ # no unzip option |
|
210 |
+ } |
|
211 |
+ checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
212 |
+ checker.user = users(:bob) |
|
213 |
+ checker.save! |
|
214 |
+ |
|
215 |
+ checker.check |
|
216 |
+ event = Event.last |
|
217 |
+ expect(event.payload['version']).to eq(2) |
|
218 |
+ end |
|
219 |
+ end |
|
220 |
+ |
|
172 | 221 |
describe 'unzipping' do |
173 | 222 |
it 'should unzip automatically if the response has Content-Encoding: gzip' do |
174 | 223 |
json = { |